In [ ]:
tmp_dir = "../data"
In [ ]:
!mkdir -p ../data
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/north/daily/data/NH_seaice_extent_final.csv
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/north/daily/data/NH_seaice_extent_nrt.csv
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/south/daily/data/SH_seaice_extent_final.csv
!wget -P ../data -qN ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02135/south/daily/data/SH_seaice_extent_nrt.csv
Variables to set before running:
In [ ]:
climatology_years = (1981, 2010)
In [ ]:
import datetime as dt
import numpy as np
import os
import pandas as pd
from pandas import ExcelWriter
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
pd.options.display.mpl_style = 'default'
In [ ]:
def parse_the_date(year, mm, dd):
return dt.date(int(year), int(mm), int(dd))
def slurp_csv(filename):
data = pd.read_csv(filename, header = None, skiprows=2,
names=["year", "mm", "dd", "extent", "missing", "source"],
parse_dates={'date':['year', 'mm', 'dd']},
date_parser=parse_the_date, index_col='date')
data = data.drop(['missing', 'source'], axis=1)
return data
def read_a_hemisphere(hemisphere):
the_dir = '../data'
final_prod_filename = os.path.join(the_dir, '{hemi}H_seaice_extent_final.csv'.format(hemi=hemisphere[0:1].upper()))
nrt_prod_filename = os.path.join(the_dir, '{hemi}H_seaice_extent_nrt.csv'.format(hemi=hemisphere[0:1].upper()))
final = slurp_csv(final_prod_filename)
nrt = slurp_csv(nrt_prod_filename)
all_data = pd.concat([final, nrt])
all_data.index = pd.to_datetime(all_data.index)
all_data = all_data.reindex(index=pd.date_range('1978-10-25', dt.date.today().strftime('%Y-%m-%d')))
all_data['hemi'] = hemisphere
return all_data
In [ ]:
def interpolate_column_excluding_extended_missing_periods(df_in, column, interpolated_column):
df = df_in.copy()
df['backfill'] = df[column].fillna(method='bfill', limit=1)
df['forwardfill'] = df[column].fillna(method='ffill', limit=1)
is_really_nan = pd.isnull(df['backfill']) | pd.isnull(df['forwardfill'])
df[interpolated_column] = df[column].interpolate()
df.loc[is_really_nan,interpolated_column] = np.nan
df = df.drop(['forwardfill', 'backfill'], axis=1)
return df
In [ ]:
def clim_string(climatology_years):
return '{0}-{1}'.format(climatology_years[0], climatology_years[1])
def get_climatological_means(df, column, climatology_years):
clim = df[(df.index.year >= climatology_years[0]) & (df.index.year <= climatology_years[1] )].copy()
clim = clim.groupby([clim.index.month, clim.index.day]).mean()[[column]]
clim = clim.rename(columns={column: clim_string(climatology_years)})
return clim
In [ ]:
import calendar
month_names = [calendar.month_name[x] for x in range(1,13)]
def prepare_daily_dataframe(column, means, df_in, title):
df = df_in.copy()
df = df[[column]].set_index([df.index.year, df.index.month, df.index.day]).unstack(0)
df.columns = df.columns.droplevel(0)
space = means.copy()
space['1981-2010'] = " "
space.rename(columns={'1981-2010': ' '}, inplace=True)
df = pd.concat([df, space, means.copy()], axis=1)
df[column] = title
df.set_index(column, append=True, inplace=True)
df = df.unstack(column)
df.columns = df.columns.reorder_levels([column, None])
df.index = df.index.set_levels(month_names, level=0)
return df
In [ ]:
def to_mon_day_rows_year_cols(column, df, title):
a = df.copy()
a = a[[column]].set_index([a.index.year, a.index.month, a.index.day]).unstack(0)
a.columns.set_levels([title], level=0)
return a
In [ ]:
def compute_anomaly_from_extent_df(df, title):
a = df.copy()
values = np.array(a.iloc[:, 0:-2])
clim = np.array(a.iloc[:, -1])
means = np.tile(clim, (values.shape[1],1)).T
anomalies = values - means
a = pd.DataFrame(data=anomalies, index=a.index, columns=a.columns[0:-2])
a.columns = a.columns.set_levels([title], level=0)
return a
In [ ]:
def compute_extent_and_5day_extent_for_hemisphere(hemisphere):
df = read_a_hemisphere(hemisphere)
df = interpolate_column_excluding_extended_missing_periods(df, 'extent', 'interpolated')
df['5 Day'] = pd.rolling_mean(df['interpolated'], window=5, min_periods=2)
df['Daily Change'] = df['5 Day'].diff(periods=1)
daily_means = get_climatological_means(df, 'interpolated', climatology_years)
five_day_means = get_climatological_means(df, '5 Day', climatology_years)
extent = prepare_daily_dataframe('extent', daily_means , df, 'Daily Extents : with climatological means based on interpolated data')
daily_change = to_mon_day_rows_year_cols('Daily Change', df, 'Daily Extent Change for 5 Day Average Extent')
average_extent = prepare_daily_dataframe('5 Day', five_day_means, df, 'Daily 5 Day Extents : with climatological means based on 5 day data')
extent_anomaly = compute_anomaly_from_extent_df(extent, 'Extent Anomaly')
avg_extent_anomaly = compute_anomaly_from_extent_df(average_extent, '5 Day Avg Ext Anomaly')
return {'Ext': extent, '5 Day Avg Ext': average_extent,
'Ext Anomaly': extent_anomaly, '5 Day Avg Ext Anomaly': avg_extent_anomaly,
'Daily Change': daily_change}
In [ ]:
def write_hemisphere(writer, df, abbv):
df['Ext'].to_excel(writer,"{0} Ext".format(abbv),float_format = "%.3f", index=True)
df['5 Day Avg Ext'].to_excel(writer,"{0} 5 Day Avg Ext".format(abbv),float_format = "%.3f")
# don't do daily anomaly.
# df['Ext Anomaly'].to_excel(writer,"{} Ext Anomaly".format(abbv),float_format = "%.3f")
df['5 Day Avg Ext Anomaly'].to_excel(writer,"{0} 5 Day Avg Ext Anomaly".format(abbv),float_format = "%.3f")
df['Daily Change'].to_excel(writer, "{0} Daily Change".format(abbv), float_format = "%.3f")
workbook = writer.book
# add colors blue with blue
format1 = workbook.add_format({'bg_color': '#CEC7FF',
'font_color': '#06009C'})
# add colors red with red
format2 = workbook.add_format({'bg_color': '#FFC7CE',
'font_color': '#9C0006'})
sheets = ["{} Daily Change".format(abbv), "{} 5 Day Avg Ext Anomaly".format(abbv)]
for sheet in sheets:
worksheet = writer.sheets[sheet]
worksheet.conditional_format('C3:ZZ369', {'type': 'cell',
'criteria': '>',
'value': 0,
'format': format1})
worksheet.conditional_format('C3:ZZ369', {'type': 'cell',
'criteria': '<',
'value': 0,
'format': format2})
In [ ]:
north = compute_extent_and_5day_extent_for_hemisphere('north')
south = compute_extent_and_5day_extent_for_hemisphere('south')
In [ ]:
writer = ExcelWriter('../output/Sea_Ice_Extent_Daily.xls', engine='xlsxwriter')
write_hemisphere(writer, north, 'NH')
write_hemisphere(writer, south, 'SH')
writer.save()
In [ ]:
# cleanup
!cd ../data; rm -f NH_seaice_extent_final.csv NH_seaice_extent_nrt.csv SH_seaice_extent_final.csv SH_seaice_extent_nrt.csv
In [ ]: